# -------------------------------------------------------------
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
#
# -------------------------------------------------------------

# Autogenerated By   : src/main/python/generator/generator.py
# Autogenerated From : scripts/builtin/randomForest.dml

from typing import Dict, Iterable

from systemds.operator import OperationNode, Matrix, Frame, List, MultiReturn, Scalar
from systemds.script_building.dag import OutputType
from systemds.utils.consts import VALID_INPUT_TYPES


def randomForest(X: Matrix,
                 y: Matrix,
                 ctypes: Matrix,
                 **kwargs: Dict[str, VALID_INPUT_TYPES]):
    """
     This script implements random forest for recoded and binned categorical and
     numerical input features. In detail, we train multiple CART (classification
     and regression trees) decision trees in parallel and use them as an ensemble.
     classifier/regressor. Each tree is trained on a sample of observations (rows)
     and optionally subset of features (columns). During tree construction, split
     candidates are additionally chosen on a sample of remaining features.
    
     .. code-block::
    
       For example, given a feature matrix with features [a,b,c,d]
       and the following two trees, M (the output) would look as follows:
    
       (L1)          |a<7|                   |d<5|
                    /     \                 /     \
       (L2)     |c<3|     |b<4|         |a<7|     P3:2
                /   \     /   \         /   \
       (L3)   P1:2 P2:1 P3:1 P4:2     P1:2 P2:1
       --> M :=
       [[1, 7, 3, 3, 2, 4, 0, 2, 0, 1, 0, 1, 0, 2],  (1st tree)
        [4, 5, 1, 7, 0, 2, 0, 2, 0, 1, 0, 0, 0, 0]]  (2nd tree)
        |(L1)| |  (L2)   | |        (L3)         |
    
       With feature sampling (feature_frac < 1), each tree is
       prefixed by a one-hot vector of sampled features
       (e.g., [1,1,1,0] if we sampled a,b,c of the four features)
    
    
    
    
    :param X: Feature matrix in recoded/binned representation
    :param y: Label matrix in recoded/binned representation
    :param ctypes: Row-Vector of column types [1 scale/ordinal, 2 categorical]
        of shape 1-by-(ncol(X)+1), where the last entry is the y type
    :param num_trees: Number of trees to be learned in the random forest model
    :param sample_frac: Sample fraction of examples for each tree in the forest
    :param feature_frac: Sample fraction of features for each tree in the forest
    :param max_depth: Maximum depth of the learned tree (stopping criterion)
    :param min_leaf: Minimum number of samples in leaf nodes (stopping criterion)
    :param min_split: Minimum number of samples in leaf for attempting a split
    :param max_features: Parameter controlling the number of features used as split
        candidates at tree nodes: m = ceil(num_features^max_features)
    :param max_values: Parameter controlling the number of values per feature used
        as split candidates: nb = ceil(num_values^max_values)
    :param impurity: Impurity measure: entropy, gini (default), rss (regression)
    :param seed: Fixed seed for randomization of samples and split candidates
    :param verbose: Flag indicating verbose debug output
    :return: Matrix M containing the learned trees, in linearized form.
    """

    params_dict = {'X': X, 'y': y, 'ctypes': ctypes}
    params_dict.update(kwargs)
    return Matrix(X.sds_context,
        'randomForest',
        named_input_nodes=params_dict)
